* Reset settings and initialize log file
launch, path("share/recurrence")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Estimate the excess recurrence of job separations at a 12-month
*              frequency (defined as in Coglianese and Price 2021), separately
*              by sex and by the month of separation.
*-------------------------------------------------------------------------------


* Prepare the data
*-------------------------------------------------------------------------------

* Load data on adult individuals
gzuse "$basepath/data/derived/cps_bms_sample.dta.gz", clear

* Restrict to the variables we need
keep pid hid tm year month mish wtraked linked_complete weeks female emp

* Restrict to individuals with valid longitudinal links throughout
keep if linked_complete == 1
drop linked_complete

* Construct an indicator for exits from employment
gen byte sep = (L.emp == 1 & emp == 0) if !missing(L.emp)

* Count the total number of separations in the dataset
quietly count if sep == 1
local agg_seps = r(N)

* Count the total number of separations observed to date/ever for each person
bysort pid (tm): gen int sum_seps = sum(sep)
bysort pid (tm): gegen int tot_seps = total(sep)

* Drop individuals who never experience a separation
drop if tot_seps == 0

* Create one replicant panel per separation
expand = tot_seps
bysort pid tm: gen int replicant = _n
gegen long rid = group(pid replicant)

* Identify the timing of each focal separation
bysort rid (tm): gegen int tm0 = total(tm * (sep == 1 & sum_seps == replicant))
bysort rid (tm): gegen byte mish0 = total(mish * (sep == 1 & sum_seps == replicant))

* Define event time around the focal separation
gen byte k = tm - tm0

* Confirm that a dedicated panel now exists for every separation in the data
quietly count if k == 0
assert r(N) == `agg_seps'

* Restrict to cases in which the focal separation occurred in the first observation window
assert inlist(mish0, 2, 3, 4, 6, 7, 8)
keep if inlist(mish0, 2, 3, 4)

* Record the month of the baseline separation
gen byte month0 = month(dofm(tm0))

* Focus on the interval around 12 months after the baseline separation
keep if inrange(k, 10, 14)


* Estimate excess recurrence
*-------------------------------------------------------------------------------

* Normalize the base month
fvset base 11 k

* Loop over sex
foreach f of numlist 0 1 {
	local fcond "female == `f'"

	* Create matrix to store estimates of excess recurrence
	matrix coefs_f`f' = J(12, 2, .)
	forvalues m = 1/12 {
		* Estimate excess recurrence, as defined in Coglianese and Price (2021)
		quietly reg sep ibn.k weeks if `fcond' & month0 == `m' [aw = wtraked], vce(cluster hid) noconstant
		quietly nlcom (exr: _b[12.k] - (_b[11.k] + _b[13.k])/2)

		* Store the point estimate and standard error
		matrix b = r(b)
		matrix V = r(V)
		matrix coefs_f`f'[`m', 1] = b[1, 1]
		matrix coefs_f`f'[`m', 2] = sqrt(V[1, 1])
	}
}


* Plot excess recurrence by the calendar month of the focal separation
*-------------------------------------------------------------------------------

* Prepare background shading
clear
set obs 13
gen rlow = -2.25
gen rupp = 6.25
gen rval = _n -0.5

* Plot estimates
#delimit ;
coefplot
	(matrix(coefs_f1[, 1]), se(coefs_f1[, 2]) offset(-.05) msymbol($sym1) mcolor($col1) ciopts(lcolor($col1)) label("Women"))
	(matrix(coefs_f0[, 1]), se(coefs_f0[, 2]) offset(+.05) msymbol($sym2) mcolor($col2) ciopts(lcolor($col2)) label("Men")),
	title("")
	keep(r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12)
	order(r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12)
	xtitle("")
	xscale(range(0.5 12.5))
	xlabel(1 "Jan." 2 "Feb." 3 "Mar." 4 "Apr." 5 "May" 6 "June" 7 "July" 8 "Aug." 9 "Sept." 10 "Oct." 11 "Nov." 12 "Dec.")
	xtick(1(1)12)
	ytitle("Excess probability of separation (p.p.)")
	yscale(range(-2.25 6.25))
	ylabel(-2(2)6)
	addplot(
		rarea rupp rlow rval if inrange(rval, 4.5, 9.5), color($ltgs) plotregion(margin(t=0 b=0)) below ||
		scatteri 0 0.5 0 12.5, recast(line) color(black) below)
	rescale(100)
	vertical
	legend(rows(1));
#delimit cr

nicepdf "$basepath/output/recurrence.pdf", indirect replace

* Report the point estimates
foreach f of numlist 0 1 {
	if `f' == 0 display _n "Men" _n
	if `f' == 1 display _n "Women" _n

	foreach m of numlist 1/12 {
		local beta = strtrim(string(100 * el(coefs_f`f', `m', 1), "%9.1f"))
		display "Month `m': `beta'"
	}
}

* Close the log file
unlaunch
